;File: CORELFLT.ASM, Last edition: 20-SEP-1995
;
;Noise reduction filter based on the autocorrelation function
;especially suitable for CW
;(c) 1994,1995 Pawel Jalocha, SP9VRC,
;e-mail: jalocha@chopin.ifj.edu.pl, sp9vrc@gw.sp9kby.ampr.org
;
;This software is not to be used for purposes other than amateur radio
;and SWL without the permision of the author.
;Usage for profit or in commercial/military products is explicitly prohibited.
;
;It is indeed not obvious to me, which parameter set is best
;for speech processing... thus I leave the research for the user.
;
;For little distorted speech try CorelLen=15, IntegLen=128 at 16kHz or 9.6kHz sampling.
;For weak carriers hunt use very large values like: CorelLen=127, IntegLen=7200
;This file as distributed is optimized for CW.
;
;The "boiling water" effect gets smaller with increasing IntegLen.
;
;A larger CorelLen lets you enchance signals correlated on larger time-scale
;but it needs more CPU power. If you get to the edge try to move to
;a lower sampling rate - it will make the correlation time-scale larger
;already and it will let you make the CorelLen even larger.
;
;If you feel that the filter enchances the low audio frequencies too much
;activate the OutDiff option.
;
;For filtering digital transmitions like RTTY I suggest setting IntegLen to
;about the symbol time (20ms for 50 baud, 20ms = 192 samples at 9.6 kHz
;sampling) and the CorelLen to about 1/4 IntegLen.
;Another approach would be to set the IntegLen many times larger
;than the symbol time.
;
;The filter's equations are:
; Auto-correlation function: C(T) = LowPass inp(t)*inp(t-T) over t
;  for T = 0..CorelLen, LowPass integration time = IntegLen
; Window: W(T)=(1.0-x^2)^2 where x=T/CorelLen+1 for T = -CorelLen..+CorelLen
; FIR filter coefficiants:   F(T) = W(T) * C(|T|) for T = -CorelLen..+CorelLen
; Signal filtering:  out(t) = sum inp(t-T-ProcDelay)*F(T) over T = -CorelLen..+CorelLen
;
;One could possibly experiment with different windows.
;Making window negative for certain T will probably attenuate or even
;remove sounds correlated on this time-lengths. Thus one could make
;a "speech-pass, carrier-remove" type filter.
;Few experiments I have made did not confirm the above :-)
;
;To minimize round-off errors in the computing of the autocorrelation
;function use signal levels close to the CODEC's range
;(or let the AGC do the job)
;
;One comment on the DSPCARD4's noise:
;I still haven't put my card into a metal box, so it makes lot of noise...
;However I have found that connecting two 1nF ceramic capacitors
;from the ground (29,30,31,32,64) to the "-IO" (28) and "+IO" (60) lines
;_greatly_ reduces that noise. I connected the capacitors straight on
;the Euro-connector plug. The numbers given are the pins as on the diagram
;on page 62 of the DSP card 4 User's Manual (Apr-94).
;


	nolist
	include 'leonid'
	list
	title 'Noise reductor by SP9VRC'

RedLED  macro mode      ; RedLED clr/set/chg
	b\mode #13,X:$FFE4
	endm

EVM56K  equ     1       ;0 => DSPCARD4 by AlefNull
                        ;1 => EVM56K by Motorola (J12 in 16K position)
                        ;the difference is in external X/Y/L RAM allocations

SampleFreq equ 8000.0   ;Lower rate makes the effective intergration times
                        ;larger but limits the passband.

BufLen  equ     64      ;sample buffer length
BatchLen equ     8      ;processing batch length (must be > 1 and a power of 2)
			;the FIR filter is recomputed once per batch
                        ;thus making the batch large makes the code a bit more
			;efficiant. On the other hand you may find
			;some degradation in the filter's performance.
			;The "imperfection" is propotional to the ratio:
			;BatchLen/IntegLen, thus for large IntegLen
			;you are saver to increase the BatchLen.
BatchLenLog equ @cvi(@log(BatchLen)/@log(2)+0.5) ;BatchLen = 2 ^ BatchLenLog

;DC-remove filter time constant
DCremove   equ 0        ;activate the DC level subtractor
DCrise     equ 2048     ;DC rise time in samples
			;the larger, the slower the DC follows
;AGC parameters
RMSrise    equ  512     ;RMS rise time in samples
			;the larger, the slower the RMS follows
AGCenable  equ 1        ;automatic adjustment of the input amplification
AGCstereo  equ 0        ;take both or only the left channel for the AGC
AGCalert   equ 0        ;indicate too low or too high audio levels with the red LED
AGChold  equ  8000      ;AGC hold time in samples
AGCfall  equ  1000      ;AGC fall time in samples/AGC unit (1.5dB)
RMSmin   equ 0.040      ;minimum and maximum RMS values for the front-end AGC
RMSmax   equ 0.100      ;do not make these closer than by a factor of 2
                        ;the RMSes are indeed MSes that is without the square root

InpDiff       equ 0     ;differentiate the input signal ("normal" value = 0)
OutDiff       equ 0     ;differentiate the output signal ("normal" value = 0)
FilterNorm    equ 1     ;normalize the formed filter (rather set this to 1)

ExternalEnable equ 0    ;switch the filter on/off via the PB8 pin.

CorelLen    equ 127     ;autocorrelation is computed upto this time difference
			;large values distorts the speech more, but reduces the noise at same time
			;at 9.6kHz sampling 127 is OK, but 150 it's just
                        ;a bit too much for a 27MHz DSP56001
                        ;a 40MHz DSP56002 (EVM56K) does 256 at 8kHz sampling

CorelLenLog equ @cvi(@log(CorelLen)/@log(2)+0.5)
			;CorelLen = 2^CorelLenLog (or at least approximately)

IntegLen    equ   256   ;autocorrelation is averaged over this period
			;you can make this very large and it will not
			;influence the computational complexity of the code

ProcDelay   equ   IntegLen+CorelLen/2
			;This delay is there to retard the samples until after
			;the FIR filter is stabilized. It is not essencial for
			;the operation of the filter, but I believe it enchances
			;it's perforamce. The ProcDelay can not be made too large
			;(note the BuffLen).

Notch       equ    0    ;remove or enchance the self-correlated components
			;but this doesn't really work for the moment...

RingBuffLen equ 2048    ;must be larger than ProcDelay+2*CorelLen
			;we anyway have 16KB Y-RAM thus we can use up 8K here.

AlternateWindow equ 0   ;0 => window = sin(x)^2           x=0..PI
                        ;1 => window = 3/4*sin(x) - 1/4*sin(3*x)

SuperSuppression equ 0  ;experimental option, modifies the window
                        ;to completely supress the short-term correlated
                        ;components.
                        ;works only for AlternateWindow=0

SPY     equ     1       ;spy on the autocorrelation function shape

        LOMEM P:$0000
        HIMEM P:$1FFF

	org     p:user_code

        jmp <Initialize

BatchLoop
	waitblk r2,BufLen,BatchLen      ;wait till enough samples for one batch
					;the following code should use r2
					;for addressing the samples
					;r7,m7 must not be used: SSI interrupts
					;r3,m3 must not be used: SCI interrupts and LEONID code
				
        if DCremove
                                ;compute DC levels
	clr a  r2,x1            ;clear sums, save r2
	clr b  X:(r2)+,x0
	if BatchLen>1
	.loop #BatchLen-1       ;average samples with the batch
	  add x0,a X:(r2)+n2,x0
	  add x0,b X:(r2)+,x0
	.endl
	endif
	add x0,a X:(r2)+n2,x0
	add x0,b
	move x1,r2              ;restore r2

	if BatchLenLog>0
	.loop #BatchLenLog      ;scale the average
	  asr a
	  asr b
	.endl           ;now: a = left DC, b = right DC
	endif

	rnd a #(1.0-1.0/DCrise*BatchLen),y0           ;futher DC filter
	rnd b #1.0/DCrise*BatchLen,y1
	move Y:<DCleft,x0
	mpy x0,y0,a  a,x0
	macr x0,y1,a  Y:<DCright,x0
	mpy x0,y0,b  b,x0
	macr x0,y1,b  a,Y:<DCleft
	move b,Y:<DCright
			;now subtract the DCs from the data
	move a,y0
	move b,y1  X:(r2),a
	move r2,x1      ;save r2
	.loop #BatchLen
	  sub y0,a
	  move a,X:(r2)+
	  move X:(r2),b
	  sub y1,b  X:(r2+n2),a
	  move b,X:(r2)+n2
	.endl
	move x1,r2              ;restore r2

        endif   ;of DCremove

        if AGCenable
                                ;sum up the RMSes of both channels
	clr a  r2,x1            ;clear sums, save r2
	clr b  X:(r2)+,x0
	if BatchLen>1
	.loop #BatchLen-1       ;sum up the signal squares
	  mac x0,x0,a X:(r2)+n2,x0
	  mac x0,x0,b X:(r2)+,x0
	.endl
	endif
	mac x0,x0,a X:(r2)+n2,x0
	mac x0,x0,b
	move x1,r2              ;restore r2

	if BatchLenLog>0
	.loop #BatchLenLog      ;divide by BatchLen
	  asr a
	  asr b
	.endl           ;now: a = left RMS, b = right RMS
	endif
	
	rnd a #(1.0-1.0/RMSrise*BatchLen),y0  ; filter these RMSes to get smoother rise/fall
	rnd b #1.0/RMSrise*BatchLen,y1
	move X:<RMSleft,x0
	mpy x0,y0,a  a,x0
	macr x0,y1,a  X:<RMSright,x0
	mpy x0,y0,b  b,x0
	macr x0,y1,b  a,X:<RMSleft
	move b,X:<RMSright

CheckRMSmin                     ;are the RMSes below the minimum required ?
	move #RMSmin,x0
	cmp x0,a
	jcc <CheckRMSmax
	if AGCstereo
	  cmp x0,b
	  jcc <CheckRMSmax
	endif
GainUp                          ;if so, increase the CODEC's input gain
	move X:<AGCcount,a      ;decrement the timeout
	move #>BatchLen,x0
	sub x0,a  #>AGCfall,x0
	move a,X:<AGCcount
	jgt <CheckRMS_OK        ;leave if not yet zero
	clr a  x0,X:AGCcount
	move Y:(r2),a1          ;get the CODEC's input control word
	move #>$0F0F00,x0
	and x0,a                ;extract the gain
	cmp x0,a  #>$010100,x0  ;already maximum ?
	jeq <RMS_Alert          ;if so flash the red LED
	add x0,a  #>$F0F000,x0  ;if not, increment the gain by 1
	move a1,x1
	move Y:(r2),a1          ;and reload all the control words
	and x0,a  n2,x0         ;in the output buffer
	or x1,a  #<4,n2         ;make n2=4 for a moment
	.loop #BufLen
	  move a1,Y:(r2)+n2
	.endl
	move x0,n2              ;restore n2
	move #0.7071,y0         ;increase the RMSes to follow the gain
	move X:RMSleft,x0       ;increase faster
	mpyr x0,y0,a  
	asl a  X:<RMSright,x0
	mpyr x0,y0,a a,X:<RMSleft
	asl a
	move a,X:<RMSright
	jmp <CheckRMS_OK

CheckRMSmax                     ;are the RMSes above the given maximum ?
	move #>AGChold,x0       ;initialize the AGC hold count-down
	move x0,X:<AGCcount
	move #RMSmax,x0
	cmp x0,a                ;compare left and right RMS
	if AGCstereo
	  jcc <GainDown
	  cmp x0,b
	endif  
	jcs <CheckRMS_OK
GainDown                        ;if the RMSes are too high
	clr a                   ;decrease the CODEC's input gain
	move Y:(r2),a1          ;get the CODEC's input control word
	move #>$0F0F00,x0
	and x0,a  #>$010100,x0  ;extract the gain bits
	sub x0,a  #>$F0F000,x0  ;attempt to decrease the gain
	jcs <RMS_Alert          ;jump if overflow
	move a1,x1
	move Y:(r2),a1          ;reload all the input control words
	and x0,a  n2,x0         ;in the buffer with the new input gain
	or x1,a  #<4,n2         ;n2=4 for a moment
	.loop #BufLen
	  move a1,Y:(r2)+n2
	.endl
	move x0,n2              ;restore n2
	move #0.7071,y0         ;decrease the RMSes to follow expected
	move X:<RMSleft,x0       ;gain reduction faster
	mpyr x0,y0,a  X:<RMSright,x0
	mpyr x0,y0,a a,X:<RMSleft
	move a,X:<RMSright
	jmp <CheckRMS_OK

RMS_Alert
        if AGCalert
          RedLED set
        endif
	jmp <CheckRMSend

CheckRMS_OK
        if AGCalert
          RedLED clr
        endif
CheckRMSend
	
	endif   ; of AGCenable

Process
			;copy samples (left input) into the ring buffer
	move r2,x1      ;save r2
	move #(AuxStore+BatchLen-1),r0
	move #$FFFF,m0
	move #<1,n0
	.loop #BatchLen
	  move X:(r2)+,a                ;get input sample
	  if InpDiff                    ;differenciate the input if requested
	    move X:<PrevInp,x0
	    sub x0,a  a,X:<PrevInp
	  endif
	  move a,X:(r0)-  a,Y:(r5)-     ;save the sample into the ring buffer (r5)
	  move (r2)+n2                  ;and into the auxiliary buffer
	.endl
	move x1,r2              ;restore r2

				;integrate the autocorrelation
        move #BatchLen,n5       ;over the current batch of samples
	move #AuxStore,r0
	move #BatchLen,n0
	.loop #(CorelLen+1)
	  move (r5)+
	  clr a  X:(r0)+,x0  Y:(r5)+,y0
	  rep #(BatchLen-1)
	    mac x0,y0,a  X:(r0)+,x0  Y:(r5)+,y0
	  mac x0,y0,a  (r5)-n5
        if BatchLenLog>2
          rep #BatchLenLog-2   ;with little risk we can avoid this to improve
            asr a              ;the dynamic range for the autocorrelation function
        endif
          rnd a (r0)-n0
					;here we are low-passing the inp(t)*inp(t-T)
					;a = input, r4 = aver. filter tap
	  move Y:<LowPassW1,y0
	  move a,x0                     ;  input -> x0
	  mpy x0,y0,a  X:(r4)+,x1       ;  + W1*inp  A0 -> x1
	  move Y:<LowPassW0,y1
	  mac x1,y1,a  X:(r4)-,x0       ;  + W0*A0   A1 -> x0
	  move Y:<LowPassW2,y0
	  macr -x0,y0,a Y:<LowPassW1,y0 ;  - W2*A1
	  mpy  x1,y0,a  a,X:(r4)+       ;  + W1*A0   a -> A0
	  move X:(r4)+,x1               ;  A1 -> x1
	  mac x1,y1,a  X:(r4)-,x0       ;  + W0*A1   A2 -> x0
	  move Y:<LowPassW2,y0
	  macr -x0,y0,a                 ;  - W2*A2
	  mpy x1,y0,a   a,X:(r4)+       ;  + W2*A1   a -> A1
	  macr x0,y1,a                  ;  + W0*A2
	  move a,X:(r4)+

	.endl
	move #(CorelLen+1),n5
	nop
	move (r5)-n5            ;restore r5
				
				;now we recompute the Filter shape
	move (r4)-              ;r4 = last auto-corel elem.

	.loop #CorelLen
	  move X:(r4),x0 Y:(r1),y0      ;x0=auto-corel. shape, y0=window
	  mpyr x0,y0,a  (r4)-n4         ;a=auto-corel. * window
	  move a,X:(r1)+
	.endl
	move X:(r4)+n4,x0 Y:(r1),y0     ;element #0 = signal energy
	mpyr x0,y0,a  x0,X:<FilterRef
	move a,X:(r1)+
;        move (r4)+n4       ; ** TEST ** we take element #1 instead of element #0
;        move X:(r4),x0 Y:(r1),y0
;        mpyr x0,y0,a  x0,X:<FilterRef
;        move a,X:(r1)+
	.loop #CorelLen
	  move X:(r4)+n4,x0 Y:(r1),y0
	  mpyr x0,y0,a
	  move a,X:(r1)+
	.endl
	move (r4)-
	move (r4)-
				;and now we can pass the data through
				;the formed filter
	move #(ProcDelay-CorelLen),n5
	move r5,r0              ;save r5
	move (r5)+n5
	move #(2*CorelLen+2),n5
	.loop #BatchLen
	  clr a  X:(r1)+,x0 Y:(r5)+,y0
	  .loop #(2*CorelLen)
	    mac x0,y0,a  X:(r1)+,x0 Y:(r5)+,y0
	  .endl
	  mac x0,y0,a  (r5)-n5
shift = CorelLenLog-1   ;scale the output for the filter length
	  if shift>0
	    rep #shift
	     asr a
	  endif
	  if shift<0
	    rep #(-shift)
	     asl a
	  endif

	if FilterNorm
	  move X:<FilterRef,b    ;now: b=element #0, a=signal
	  tst b
	  jeq <ScaleDone        ;element #0 = 0 ? avoid any scaling
	  jnr <FineScale
ShiftScale asl a                ;scale up
	   asl b                ;until b is normalized
	  jnn <ShiftScale
FineScale
	  move #-1.0,y1
	  .loop #4
	    tfr y1,b  b,x1
	    add x1,b  a,x0
	    tfr x1,b  b,y0
	    macr -x0,y0,a  b,x0
	    macr -x0,y0,b
	  .endl
ScaleDone
	  endif

	  if Notch              ;this works somehow but...
	    move #CorelLen+1,n5
	    move a,y0
	    move #0.935,x0      ;why this number ? Don't ask me...
	    move Y:(r5+n5),a
	    macr -x0,y0,a  #(2*CorelLen+2),n5

;            asr a #CorelLen+1,n5
;            neg a  X:<NotchWeight,x0
;            move Y:(r5+n5),y0
;            mac x0,y0,a  #(2*CorelLen+2),n5
;            tfr a,b
;            rep #4
;              asr b
;            tfr x0,b  b,x1
;            mac -x1,y0,b
;            move b,X:<NotchWeight
	  endif
	  rnd a (r2)+           ;r2 = left output
	  if OutDiff
	    move X:<PrevOut,x0
	    sub x0,a  a,X:<PrevOut
	  endif
	  if ExternalEnable
	    jset #8,X:<<$FFE4,SwitchDone
	      move #CorelLen+1,n5
	      nop
	      move Y:(r5+n5),a
	      move #(2*CorelLen+2),n5
SwitchDone
	  endif
;          rep #4        ;** TEST ** amplify the signal
;            asl a
	  move a,Y:(r2)+        ;copy output to left
	  move a,Y:(r2)+        ;and right channel
	  move (r2)+
	.endl
	move r0,r5              ;restore r5

;        move X:<NotchWeight,a
;        rep #8
;          asr a
;        move a1,x0
;        putc

          if SPY                ;Spy on the autocorrelation function
          jsr <SpySync
          jcs <SpyDone
                                ;512 words must follow now
          move (r4)+
          move (r4)+
          .loop #CorelLen+1
            move L:(r4)+n4,a
            jsr SpyA
            nop
          .endl
          clr a (r4)-
          move  (r4)-
          .loop #512-(CorelLen+1)
            jsr <SpyA
            nop
          .endl
SpyDone
          endif

        jmp     <BatchLoop

Initialize      ;initialize registers, buffers, windows, etc.

;        andi #%11110011,mr      ;scaling bits = 00

        move                #Buffer+2,r7        ;for the CODEC's interrupt routine
        move                #<BufLen*4-1,m7

        move                #Buffer,r2          ;for us to address the input and output samples
        move                #<4-1,n2
        move                #<BufLen*4-1,m2

        clr a  #RingBuff,r5       ;r5 to address the ring buffer
        move #RingBuffLen-1,m5
        move #RingBuffLen,x0
        .loop x0                  ;zero the ring buffer
          move a,Y:(r5)+
        .endl
        
        clr a  #Corel,r4        ;r4 to address the auto-correlation buffer
        move #(3*(CorelLen+1)-1),m4
        move #<3,n4
        .loop #3*(CorelLen+1)   ;zero the auto-correlation function
          move a,X:(r4)+
        .endl

        move #Window,r1         ;r1 to address the Filter (X) and the Window (Y)
        move #2*CorelLen,m1
        move #CorelLen,n1

                                ;compute the window's shape
      if AlternateWindow
        clr a #1.0/(2*CorelLen+1),b
        move ab,L:<Phase
        .loop #(2*CorelLen+1)
           move a,x0
           jsr <IQ
           move b,Y:(r1)+
           move L:<Phase,ab
           add b,a
           move a,X:<Phase
        .endl
        clr a #3.0/(2*CorelLen+1),b
        move ab,L:<Phase
        .loop #(2*CorelLen+1)
           move a,x0
           jsr <IQ
           move Y:(r1),x1
           move b,x0
           move #-0.25,y0
           move #0.75,y1
           mpy x0,y0,b
           macr x1,y1,b
           move b,Y:(r1)+
           move L:<Phase,ab
           add b,a
           move a1,X:<Phase
        .endl
      else
       if SuperSuppression
        clr a #2.0/(2*CorelLen+1),b
       else
        clr a #1.0/(2*CorelLen+1),b
       endif
        move ab,L:<Phase
        .loop #(2*CorelLen+1)
           move a,x0
           jsr <IQ
           move b,x0
           mpy x0,x0,b
           move b,Y:(r1)+
           move L:<Phase,ab
           add b,a
           move a,X:<Phase
        .endl
      endif

                        ;initialize input/output control words in the buffer
                        ;zero input/output data
      if EVM56K ;for EVM56002 use the microphone input
        ctrlcd  1,r2,BufLen,MIC,0.0,0.0,LINEO|HEADP,0.0,0.0
      else      ;for DSPCARD4 use the LINE input
        ctrlcd  1,r2,BufLen,LINEI,0.0,0.0,LINEO|HEADP,0.0,0.0
      endif
        opencd SampleFreq/1000.0,HPF    ;start taking samples at given rate:
                                        ;8000.0, 9600.0, 16000.0, etc.

        jmp <BatchLoop

        if SPY

SpySync move a10,L:<SpySave     ;output: carry=1 => no spy request
        move a2,X:<SpySave+1    ;carry=0 => spy request !
        move x0,Y:<SpySave+1    ;512 words (jsr <SpyA) must follow
        move x1,Y:<SpyCount
        lookc 0
        jcs <Spy_end
        move #>'S',a
        cmp x0,a
        ori #$01,ccr
        jne <Spy_end
        move #>'P',x0
        putc
        move #>512,a
        move a,X:<SpyCount
        andi #$FE,ccr
        jmp <Spy_end

SpyA    move a10,L:<SpySave
        move a2,X:<SpySave+1
        move x0,Y:<SpySave+1
        move x1,Y:<SpyCount
        move X:<SpyCount,a
        tst a
        jne <Spy_copy

Spy_check
        lookc 0
        jcs <Spy_end
        move #>'S',a
        cmp x0,a
        jne <Spy_end
        move #>'P',x0
        putc
        move #>512,a
Spy_copy
        move #>1,x0
        sub x0,a
        move a,X:<SpyCount

        move X:<SpySave,a
	rep	#8
	lsr	a
	move	a1,x0
	putc
        move X:<SpySave,a
	rep	#16
	lsr	a
	move	a1,x0
        putc

Spy_end move L:<SpySave,a10
        move X:<SpySave+1,a2
        move Y:<SpySave+1,x0
        move Y:<SpyCount,x1
        rts

        endif

PI      equ     3.14159265358979323846

;this routine computes a cosine/sine pair using the sine ROM
;with a second order (linear+quadrature) approximation between table points
IQ                              ;x0 = angle ( -1 = -PI, +1 = +PI)
        ori #%00000100,omr      ;enable the sine ROM table
        move #>$80,x1   ;shift out 8 most significant bits
        mpy x0,x1,a  #>$FF,x0
        move x0,m0
        and x0,a     #>$100,x0
        or x0,a      #<$40,n0
        move a1,r0      ;put the 8 most significant bits into r0 with offset = $100
        move a0,y0      ;save the remaining bits in y0
        jclr #23,y0,SinTable_lev2
          move (r0)+
SinTable_lev2
        move Y:(r0+n0),x0       ;x0 = coarse cosine
        move Y:(r0),x1          ;x1 = coarse sine
        mpyr x1,y0,a  #PI/256.0,y1
        tfr x0,a  a,x1
        macr -x1,y1,a           ;a = fine cosine
        mpyr x0,y0,b  Y:(r0),x1
        andi #%11111011,omr     ;disable the sine ROM table
        tfr x1,b  b,x1
        macr x1,y1,b  #PI*PI/2.0/65536.0,y1  ;b = fine sine
        mpyr y0,y0,a  a,x0
        move a,y0
        mpyr y0,y1,a
        tfr x0,a  a,y1
        macr -x0,y1,a  b,x1     ;a = super fine cosine
        macr -x1,y1,b           ;b = super fine sine
        rts                     ;x,y are modified
                                ;r0,m0,n0 are modified
                                ;maximum error is about 0.7E-6
                                ;execution time 4+64+4 clock cycles
                                ;including "jsr <IQ" and "rts"

        LOMEM X:$0000,Y:$0000,L:$0000
        HIMEM X:$00FF,Y:$00FF,L:$00FF

        org L:user_data
        if SPY
SpySave dc 0,0
SpyCount dc 0
        endif

Phase   ds 1            ;for window initialization

LastL = *
        org X:LastL
        org Y:LastL

        org Y:
DCleft  dc 0            ;DC bias for both channels
DCright dc 0
	
        org X:
RMSleft  dc 0           ;RMS (energy) for both channels
RMSright dc 0

	org X:
AGCcount dc 0           ;counter for the AGC hold-off

	org X:
PrevInp dc 0
PrevOut dc 0

NotchWeight dc 0.80
FilterRef dc 0

	org X:
AuxStore dsm BatchLen   ;auxiliary buffer
	
	org Y:          ;factors for the low pass filters to average the auto-corel.
LowPassW0 dc 1.0-2.5*BatchLen*0.5/IntegLen
LowPassW1 dc 2.5*BatchLen*1.0/IntegLen
LowPassW2 dc 2.5*BatchLen*0.5/IntegLen
			;the above low pass is used for continues averaging
			;the auto-correlation function

      if EVM56K
        LOMEM X:$2000,Y:$0100,L:$2000
        HIMEM X:$3FFF,Y:$3FFF,L:$3FFF
      else
        LOMEM X:$0100,Y:$0100,L:$0100
        HIMEM X:$1FFF,Y:$3FFF,L:$1FFF
      endif

      if EVM56K
        org X:$2000
        org Y:$2000
      else
        org X:$100
        org Y:$100
      endif

        org X:
Buffer   dsm BufLen*4   ;CODEC's input buffer
        org Y:
	 dsm BufLen*4   ;CODEC's output buffer
	
	org X:
Filter   dsm 2*CorelLen+1       ;filter formed from the autocorrelation
	org Y:
Window   dsm 2*CorelLen+1       ;window applied to the autocorrelation
				;it is essential that Filter and Window
				;are at same offset.
	org X:
Corel    dsm 3*(CorelLen+1)     ;autocorrelation function
				;there are three elements per time element
				;which serve as state variables for
                                ;the IIR low-pass filter
	org Y:
RingBuff dsm RingBuffLen         ;ring buffer

	end
